Análisis de clases latentes exploratoria y comparativa sin y con predictores, sin pueblo originario, pero con año y recategorización de edad mujer y semana gestacional hito 1, caracterización de clases y medidas de ajuste
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js" $(document).ready(function() {
$('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
// onClick function for all plots (img's)
$('img:not(.zoomImg)').click(function() {
$('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
$('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
});
// onClick function for zoomImg
$('img.zoomImg').click(function() {
$('.zoomDiv').css({opacity: '0', width: '0%'});
});
});
<script src="hideOutput.js"></script> $(document).ready(function() {
$chunks = $('.fold');
$chunks.each(function () { // add button to source code chunks
if ( $(this).hasClass('s') ) {
$('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
$('pre.r', this).children('code').attr('class', 'folded');
} // add button to output chunks
if ( $(this).hasClass('o') ) {
$('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");
$('pre:not(.r)', this).children('code:not(r)').addClass('folded'); // add button to plots
$(this).find('img').wrap('<pre class=\"plot\"></pre>');
$('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");
$('pre.plot', this).children('img').addClass('folded');
}
}); // hide all chunks when document is loaded
$('.folded').css('display', 'none') // function to toggle the visibility
$('.showopt').click(function() {
var label = $(this).html();
if (label.indexOf("Show") >= 0) {
$(this).html(label.replace("Show", "Hide"));
} else {
$(this).html(label.replace("Hide", "Show"));
}
$(this).siblings('code, img').slideToggle('fast', 'swing');
});
}); Cargamos los datos
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 653048 34.9 1332317 71.2 897138 48.0
Vcells 1172769 9.0 8388608 64.0 2075132 15.9
load("data2_lca2_adj_sin_po_2023_05_11.RData")
Cargamos los paquetes
knitr::opts_chunk$set(echo = TRUE)
if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(effects)){install.packages("effects")}
#Si probs.start se establece en NULL (predeterminado) al llamar Polca, a continuación, la función genera los valores de partida al azar en cada ejecución. Esto significa que repite carreras de Polca normalmente producirán resultados con los mismos parámetros estimados (correspondiente a la misma el máximo diario de probabilidad), pero con etiquetas de clase latentes reordenados
#https://drive.google.com/file/d/10njMh5JEcqaBgCnoZdJ1uk3uCEkocDez/view?usp=share_link
#http://daob.nl/wp-content/uploads/2015/07/ESRA-course-slides.pdf
#https://docs.google.com/document/d/1LVeDpAP6CfT3n8B6HhHcc_SRnzO0JBoT/edit
#bvr(ppio_newList$lc_entropy_best_model)
#A list of matrices of class-conditional response probabilities to be used as the starting values for the estimation algorithm. Each matrix in the list corresponds to one manifest variable, with one row for each latent class, and one column for each outcome. The default is NULL, producing random starting values. Note that if nrep>1, then any user-specified probs.start values are only used in the first of the nrep attempts.
#The poLCA.reorder function takes as its first argument the list of starting values probs.start, and as its second argument a vector describing the desired reordering of the latent classes.
new.probs.start <- poLCA.reorder(LCA_best_model_ppio$probs.start, order(LCA_best_model_ppio$P, decreasing = TRUE))
#new.probs.start <-poLCA.reorder(probs.start,c(4,1,3,2))
#A continuación, ejecute PoLCA, una vez más, esta vez utilizando los valores iniciales reordenados en la llamada de función.
#The argument nrep=5 tells the program to repeat nrep times, and keep the fit with the highest likelihood to try to avoid local maxima.
#.maxiter – The maximum number of iterations through which the estimation algorithm will cycle.
#.nrep - Number of times to estimate the model, using different values of probs.start. (default is one)
set.seed(2125)
LCA_best_model_mod<-
poLCA(f_preds, mydata_preds3, nclass=length(LCA_best_model_ppio$P),
maxiter=10000,tol=1e-5, na.rm=FALSE,nrep=1e3, verbose=TRUE, calc.se=TRUE,probs.start=new.probs.start)
Conditional item response (column) probabilities,
by outcome variable, for each class (row)
$AÑO
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0 0.1785 0.1890 0.2012 0.1918 0.2396
class 2: 0 0.1730 0.2265 0.1893 0.2260 0.1852
class 3: 0 0.9065 0.0000 0.0000 0.0935 0.0000
class 4: 0 0.1587 0.2312 0.1941 0.2319 0.1841
class 5: 0 0.0806 0.2830 0.1395 0.2428 0.2540
class 6: 0 0.1336 0.1706 0.2574 0.1197 0.3186
class 7: 0 0.1345 0.2222 0.2000 0.2674 0.1759
$CAUSAL
Pr(1) Pr(2) Pr(3) Pr(4)
class 1: 0 0.0052 0.0000 0.9948
class 2: 0 0.0738 0.9262 0.0000
class 3: 0 0.7475 0.2525 0.0000
class 4: 0 0.0906 0.9094 0.0000
class 5: 0 0.9549 0.0451 0.0000
class 6: 0 0.0773 0.0000 0.9227
class 7: 0 0.3372 0.6628 0.0000
$EDAD_MUJER_REC
Pr(1) Pr(2) Pr(3) Pr(4)
class 1: 0.3490 0.3489 0.2125 0.0896
class 2: 0.5018 0.0000 0.0339 0.4643
class 3: 0.5806 0.0191 0.1478 0.2525
class 4: 0.5097 0.0237 0.1777 0.2888
class 5: 0.6103 0.0037 0.1588 0.2272
class 6: 0.5281 0.1565 0.1915 0.1240
class 7: 0.5710 0.0105 0.1271 0.2914
$PAIS_ORIGEN_REC
Pr(1) Pr(2) Pr(3)
class 1: 0.0000 0.9926 0.0074
class 2: 0.0000 0.9291 0.0709
class 3: 0.0734 0.8304 0.0962
class 4: 0.0000 0.9802 0.0198
class 5: 0.0000 0.8830 0.1170
class 6: 0.0000 0.0000 1.0000
class 7: 0.0000 0.0000 1.0000
$HITO1_EDAD_GEST_SEM_REC
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0.0089 0.7795 0.2117 0.0000 0.0000 0.0000
class 2: 0.0073 0.0019 0.6147 0.2568 0.1008 0.0185
class 3: 0.0442 0.0266 0.1127 0.2416 0.2731 0.3017
class 4: 0.0056 0.0000 0.3321 0.3547 0.2286 0.0790
class 5: 0.0737 0.1889 0.2347 0.5027 0.0000 0.0000
class 6: 0.0212 0.7912 0.1877 0.0000 0.0000 0.0000
class 7: 0.0068 0.0056 0.3278 0.3742 0.2248 0.0608
$MACROZONA
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0.0037 0.3846 0.1749 0.1450 0.0862 0.2056
class 2: 0.0000 0.7028 0.0998 0.0583 0.0683 0.0708
class 3: 0.0320 0.1731 0.3806 0.1545 0.0807 0.1792
class 4: 0.0000 0.3323 0.1717 0.2136 0.0881 0.1944
class 5: 0.0000 0.4044 0.1554 0.1661 0.1154 0.1587
class 6: 0.0059 0.5592 0.0824 0.0164 0.2961 0.0400
class 7: 0.0000 0.6280 0.0706 0.0305 0.2457 0.0252
$PREV_TRAMO_REC
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
class 1: 0.0018 0.0698 0.7236 0.1996 0.0052
class 2: 0.0000 0.8098 0.0000 0.1793 0.0108
class 3: 0.0209 0.0357 0.6387 0.2600 0.0447
class 4: 0.0008 0.0063 0.6416 0.3513 0.0000
class 5: 0.0000 0.0914 0.5580 0.3506 0.0000
class 6: 0.0293 0.0051 0.5217 0.1622 0.2817
class 7: 0.0055 0.0000 0.6053 0.2937 0.0955
Estimated class population shares
0.1523 0.118 0.0647 0.3346 0.1961 0.0448 0.0893
Predicted class memberships (by modal posterior prob.)
0.151 0.104 0.0604 0.332 0.2048 0.0443 0.1035
=========================================================
Fit for 7 latent classes:
=========================================================
number of observations: 3789
number of estimated parameters: 195
residual degrees of freedom: 3594
maximum log-likelihood: -29481.27
AIC(7): 59352.54
BIC(7): 60569.31
G^2(7): 4687.721 (Likelihood ratio/deviance statistic)
X^2(7): 18576.53 (Chi-square goodness of fit)
output_LCA_best_model_mod<-capture.output(LCA_best_model_mod)
glance_LCA_best_model_mod<-glance(LCA_best_model_mod)
mydata_preds_LCA1 <- augment(LCA_best_model_mod, data = mydata_preds3)
# fig.height=15,
## If you are interested in the population-shares of the classes, you can get them like this:
warning(paste("Probabilidades posteriores: ",
paste(round(colMeans(LCA_best_model_mod$posterior)*100,2), collapse=", ")
))
## or you inspect the estimated class memberships:
warning(paste("Probabildiades predichas: ",
paste(round(prop.table(table(LCA_best_model_mod$predclass)),4)*100, collapse=", ")
))
traductor_cats <- readxl::read_excel("tabla12.xlsx") %>%
dplyr::mutate(level=readr::parse_double(level)) %>%
dplyr::mutate(charactersitic=gsub(" \\(%\\)", "", charactersitic))
lcmodel <- reshape2::melt(LCA_best_model_mod$probs, level=2)
lcmodel<- lcmodel %>%
dplyr::mutate(pr=as.numeric(gsub("[^0-9.]+", "", Var2))) %>%
dplyr::left_join(traductor_cats[,c("charactersitic", "level", "CATEGORIA")], by= c("L2"="charactersitic", "pr"="level"))
lcmodel$text_label<-paste0("Categoria:",lcmodel$CATEGORIA,"<br>%: ",scales::percent(lcmodel$value))
zp1 <- ggplot(lcmodel,aes(x = L2, y = value, fill = Var2, label=text_label))
zp1 <- zp1 + geom_bar(stat = "identity", position = "stack")
zp1 <- zp1 + facet_grid(Var1 ~ .)
zp1 <- zp1 + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp1 <- zp1 + labs(y = "Porcentaje de probabilidad de respuesta",
x = "",
fill ="Cateorías de\nRespuesta")
zp1 <- zp1 + theme( axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.grid.major.y=element_blank())
zp1 <- zp1 + guides(fill = guide_legend(reverse=TRUE))
zp1 <- zp1 + theme(axis.text.x = element_text(angle = 30, hjust = 1))
#print(zp1)
ggplotly(zp1, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800)
Figure 1: Selected Model
#In this case, residuals are actual cell counts vs. expected cell counts.
bvr_LCA_best_model_mod<-bvr(LCA_best_model_mod)
#conditional probabilities
#Pr(B1=1|Class 3)
posteriors <- data.frame(LCA_best_model_mod$posterior, predclass=LCA_best_model_mod$predclass)
classification_table <- plyr::ddply(posteriors, "predclass", function(x) colSums(x[,1:length(LCA_best_model_ppio$P)]))
clasification_errors<- 1 - sum(diag(as.matrix(classification_table[,2:(length(LCA_best_model_ppio$P)+1)]))) / sum(classification_table[,2:(length(LCA_best_model_ppio$P)+1)])
warning(paste("Error de clasificación: ", round(clasification_errors,2)))
Warning: Error de clasificación: 0.11
entropy_alt <- function(p) sum(-p * log(p))
error_prior <- entropy_alt(LCA_best_model_mod$P) # Class proportions
error_post <- mean(apply(LCA_best_model_mod$posterior, 1, entropy_alt),na.rm=T)
R2_entropy_alt <- (error_prior - error_post) / error_prior
warning(paste("Entropía: ", round(R2_entropy_alt,2)))
Warning: Entropía: 0.79
#https://stackoverflow.com/questions/72783185/entropy-calculation-gives-nan-is-applying-na-omit-a-valid-tweak
entropy.safe <- function (p) {
if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
log.p <- numeric(length(p))
safe <- p != 0
log.p[safe] <- log(p[safe])
sum(-p * log.p)
}
error_prior2 <- entropy.safe(LCA_best_model_mod$P) # Class proportions
error_post2 <- mean(apply(LCA_best_model_mod$posterior, 1, entropy.safe),na.rm=T)
R2_entropy_safe <- (error_prior2 - error_post2) / error_prior2
warning(paste("Entropía segura: ", round(R2_entropy_safe,2)))
Warning: Entropía segura: 0.84
entropy.brutal <- function (p) {
if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
log.p <- log(p)
## as same as sum(na.omit(-p * log.p))
sum(-p * log.p, na.rm = TRUE)
}
error_prior3 <- entropy.brutal(LCA_best_model_mod$P) # Class proportions
error_post3 <- mean(apply(LCA_best_model_mod$posterior, 1, entropy.brutal),na.rm=T)
R2_entropy_brutal <- (error_prior3 - error_post3) / error_prior3
warning(paste("Entropía brutal: ", round(R2_entropy_brutal,2)))
Warning: Entropía brutal: 0.84
Warning: Entropía (solución Oberski): 0.84
#\#minimum average posterior robability of cluster membership (\>0.7), interpretability (classes are clearly distinguishable), and parsimony (each class has a sufficient sample size for further analysis; n≥50).
Ver si la exclusión de casos que no calzan en alguna clase tiene consecuencias.
#To evaluate whether the exclusion of cases would bias the LCA results, a sensitivity analysis was carried out. We conducted T-Tests and Wilcoxon–Mann–Whitney tests (for non-parametric data) to compare included and excluded records in terms of demographic and clinical background characteristics and baseline pain scores (all 638 patients completed pain intensity, frequency and impairment scales).
tidy(LCA_best_model_mod) %>%
# dplyr::mutate(variable= dplyr::case_when(variable=="naq1"~"naq01",
# variable=="naq2"~"naq02",
# variable=="naq3"~"naq03",
# variable=="naq4"~"naq04",
# variable=="naq5"~"naq05",
# variable=="naq6"~"naq06",
# variable=="naq7"~"naq07",
# variable=="naq8"~"naq08",
# variable=="naq9"~"naq09",
# TRUE~variable)) %>%
ggplot(aes(outcome, estimate, color = factor(class), group = class)) +
geom_line() +
facet_wrap(~variable, nrow = 4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme_bw()+
theme(legend.position = "bottom")
Figure 2: Modelo seleccionado
.
new.probs.start_adj <- poLCA.reorder(LCA_best_model_adj_ppio$probs.start,
order(LCA_best_model_adj_ppio$P, decreasing = TRUE))
set.seed(2125)
LCA_best_model_adj_mod<-
poLCA(f_adj, mydata_preds3, nclass=length(LCA_best_model_adj_ppio$P),
maxiter=10000,tol=1e-5, na.rm=FALSE,nrep=1e3, verbose=TRUE, calc.se=TRUE,probs.start=new.probs.start_adj)
Conditional item response (column) probabilities,
by outcome variable, for each class (row)
$AÑO
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0 0.1586 0.2380 0.1962 0.2370 0.1702
class 2: 0 0.0851 0.2794 0.1378 0.2428 0.2549
class 3: 0 0.1696 0.1865 0.2106 0.1772 0.2561
class 4: 0 0.1725 0.2120 0.1820 0.2285 0.2050
class 5: 0 0.1280 0.2195 0.2111 0.2636 0.1778
class 6: 0 0.9454 0.0000 0.0000 0.0546 0.0000
$CAUSAL
Pr(1) Pr(2) Pr(3) Pr(4)
class 1: 0 0.1089 0.8911 0.0000
class 2: 0 0.9542 0.0458 0.0000
class 3: 0 0.0087 0.0000 0.9913
class 4: 0 0.0636 0.9364 0.0000
class 5: 0 0.3835 0.6165 0.0000
class 6: 0 0.7503 0.2497 0.0000
$EDAD_MUJER_REC
Pr(1) Pr(2) Pr(3) Pr(4)
class 1: 0.4931 0.0275 0.1946 0.2849
class 2: 0.6092 0.0036 0.1589 0.2284
class 3: 0.3873 0.3092 0.2087 0.0948
class 4: 0.5310 0.0000 0.0404 0.4286
class 5: 0.5816 0.0100 0.1292 0.2792
class 6: 0.5903 0.0183 0.1532 0.2382
$PAIS_ORIGEN_REC
Pr(1) Pr(2) Pr(3)
class 1: 0.0000 0.9646 0.0354
class 2: 0.0000 0.8838 0.1162
class 3: 0.0000 0.7789 0.2211
class 4: 0.0000 0.9300 0.0700
class 5: 0.0000 0.0000 1.0000
class 6: 0.0793 0.8133 0.1074
$HITO1_EDAD_GEST_SEM_REC
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0.0000 0.0000 0.2628 0.3725 0.2665 0.0983
class 2: 0.0739 0.1956 0.2374 0.4930 0.0000 0.0000
class 3: 0.0110 0.7843 0.2047 0.0000 0.0000 0.0000
class 4: 0.0153 0.0015 0.6473 0.2468 0.0791 0.0099
class 5: 0.0098 0.0170 0.3180 0.3909 0.2121 0.0522
class 6: 0.0495 0.0180 0.1190 0.2478 0.2612 0.3046
$MACROZONA
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6)
class 1: 0.0000 0.2938 0.1737 0.2426 0.0851 0.2047
class 2: 0.0000 0.3997 0.1570 0.1673 0.1148 0.1612
class 3: 0.0045 0.4217 0.1551 0.1173 0.1321 0.1692
class 4: 0.0000 0.6639 0.1189 0.0513 0.0804 0.0855
class 5: 0.0000 0.6475 0.0689 0.0129 0.2552 0.0154
class 6: 0.0338 0.1762 0.3782 0.1515 0.0852 0.1752
$PREV_TRAMO_REC
Pr(1) Pr(2) Pr(3) Pr(4) Pr(5)
class 1: 0.0013 0.0328 0.6340 0.3319 0.0000
class 2: 0.0012 0.0892 0.5605 0.3491 0.0000
class 3: 0.0070 0.0560 0.6802 0.1928 0.0641
class 4: 0.0000 0.5238 0.2023 0.2659 0.0080
class 5: 0.0054 0.0096 0.5915 0.2830 0.1105
class 6: 0.0210 0.0372 0.6410 0.2519 0.0489
Estimated class population shares
0.2974 0.1958 0.1946 0.1673 0.085 0.0599
Predicted class memberships (by modal posterior prob.)
0.3006 0.2059 0.1935 0.143 0.1006 0.0565
=========================================================
Fit for 6 latent classes:
=========================================================
2 / 1
Coefficient Std. error t value Pr(>|t|)
(Intercept) -0.94023 0.14060 -6.687 0
outcome1 0.67953 0.14872 4.569 0
=========================================================
3 / 1
Coefficient Std. error t value Pr(>|t|)
(Intercept) -1.75673 0.15781 -11.132 0
outcome1 1.60169 0.17163 9.332 0
=========================================================
4 / 1
Coefficient Std. error t value Pr(>|t|)
(Intercept) -2.91618 0.53214 -5.480 0
outcome1 2.66235 0.48860 5.449 0
=========================================================
5 / 1
Coefficient Std. error t value Pr(>|t|)
(Intercept) -2.03319 0.21796 -9.328 0
outcome1 0.98596 0.22479 4.386 0
=========================================================
6 / 1
Coefficient Std. error t value Pr(>|t|)
(Intercept) -2.61131 0.39063 -6.685 0.000
outcome1 1.24497 0.38146 3.264 0.001
=========================================================
number of observations: 3789
number of estimated parameters: 172
residual degrees of freedom: 3617
maximum log-likelihood: -29552.28
AIC(6): 59448.55
BIC(6): 60521.81
X^2(6): 21749.49 (Chi-square goodness of fit)
ALERT: estimation algorithm automatically restarted with new initial values
output_LCA_best_model_adj_mod<-capture.output(LCA_best_model_adj_mod)
glance_LCA_best_model_adj_mod<-glance(LCA_best_model_adj_mod)
mydata_preds_LCA2 <- augment(LCA_best_model_adj_mod, data = mydata_preds3)
# fig.height=15,
## If you are interested in the population-shares of the classes, you can get them like this:
warning(paste("Probabilidades posteriores: ",
paste(round(colMeans(LCA_best_model_adj_mod$posterior)*100,2), collapse=", ")
))
## or you inspect the estimated class memberships:
warning(paste("Probabildiades predichas: ",
paste(round(prop.table(table(LCA_best_model_adj_mod$predclass)),4)*100, collapse=", ")
))
lcmodel_adj <- reshape2::melt(LCA_best_model_adj_mod$probs, level=2)
lcmodel_adj<- lcmodel_adj %>%
dplyr::mutate(pr=as.numeric(gsub("[^0-9.]+", "", Var2))) %>%
dplyr::left_join(traductor_cats[,c("charactersitic", "level", "CATEGORIA")], by= c("L2"="charactersitic", "pr"="level"))
lcmodel_adj$text_label<-paste0("Categoria:",lcmodel_adj$CATEGORIA,"<br>%: ",scales::percent(lcmodel_adj$value))
zp2 <- ggplot(lcmodel_adj,aes(x = L2, y = value, fill = Var2, label=text_label))
zp2 <- zp2 + geom_bar(stat = "identity", position = "stack")
zp2 <- zp2 + facet_grid(Var1 ~ .)
zp2 <- zp2 + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp2 <- zp2 + labs(y = "Porcentaje de probabilidad de respuesta",
x = "",
fill ="Cateorías de\nRespuesta")
zp2 <- zp2 + theme( axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.grid.major.y=element_blank())
zp2 <- zp2 + guides(fill = guide_legend(reverse=TRUE))
zp2 <- zp2 + theme(axis.text.x = element_text(angle = 30, hjust = 1))
#print(zp1)
ggplotly(zp2, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800)
Figure 3: Selected Model
#In this case, residuals are actual cell counts vs. expected cell counts.
bvr_LCA_best_model_adj_mod<-bvr(LCA_best_model_adj_mod)
#conditional probabilities
#Pr(B1=1|Class 3)
posteriors_adj <- data.frame(LCA_best_model_adj_mod$posterior,
predclass=LCA_best_model_adj_mod$predclass)
classification_table_adj <- plyr::ddply(posteriors, "predclass", function(x) colSums(x[,1:length(LCA_best_model_adj_mod$P)]))
clasification_errors_adj<- 1 - sum(diag(as.matrix(classification_table_adj[,2:(length(LCA_best_model_adj_mod$P)+1)]))) / sum(classification_table_adj[,2:(length(LCA_best_model_adj_mod$P)+1)])
warning(paste("Error de clasificación: ", round(clasification_errors_adj,2)))
Warning: Error de clasificación: 0.11
entropy_alt <- function(p) sum(-p * log(p))
error_prior_adj <- entropy_alt(LCA_best_model_adj_mod$P) # Class proportions
error_post_adj <- mean(apply(LCA_best_model_adj_mod$posterior, 1, entropy_alt),na.rm=T)
R2_entropy_alt_adj <- (error_prior_adj - error_post_adj) / error_prior_adj
warning(paste("Entropía: ", round(R2_entropy_alt_adj,2)))
Warning: Entropía: 0.81
#https://stackoverflow.com/questions/72783185/entropy-calculation-gives-nan-is-applying-na-omit-a-valid-tweak
entropy.safe <- function (p) {
if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
log.p <- numeric(length(p))
safe <- p != 0
log.p[safe] <- log(p[safe])
sum(-p * log.p)
}
error_prior2_adj <- entropy.safe(LCA_best_model_adj_mod$P) # Class proportions
error_post2_adj <- mean(apply(LCA_best_model_adj_mod$posterior, 1, entropy.safe),na.rm=T)
R2_entropy_safe_adj <- (error_prior2_adj - error_post2_adj) / error_prior2_adj
warning(paste("Entropía segura: ", round(R2_entropy_safe,2)))
Warning: Entropía segura: 0.84
Warning: Entropía (solución Oberski): 0.78
#\#minimum average posterior robability of cluster membership (\>0.7), interpretability (classes are clearly distinguishable), and parsimony (each class has a sufficient sample size for further analysis; n≥50).
Ver si la exclusión de casos que no calzan en alguna clase tiene consecuencias.
#To evaluate whether the exclusion of cases would bias the LCA results, a sensitivity analysis was carried out. We conducted T-Tests and Wilcoxon–Mann–Whitney tests (for non-parametric data) to compare included and excluded records in terms of demographic and clinical background characteristics and baseline pain scores (all 638 patients completed pain intensity, frequency and impairment scales).
tidy(LCA_best_model_adj_mod) %>%
# dplyr::mutate(variable= dplyr::case_when(variable=="naq1"~"naq01",
# variable=="naq2"~"naq02",
# variable=="naq3"~"naq03",
# variable=="naq4"~"naq04",
# variable=="naq5"~"naq05",
# variable=="naq6"~"naq06",
# variable=="naq7"~"naq07",
# variable=="naq8"~"naq08",
# variable=="naq9"~"naq09",
# TRUE~variable)) %>%
ggplot(aes(outcome, estimate, color = factor(class), group = class)) +
geom_line() +
facet_wrap(~variable, nrow = 4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme_bw()+
theme(legend.position = "bottom")
Figure 4: Modelo seleccionado
# fig.cap= "Probabilidad predicha de presentar interrupción del embarazo por clase",
cbind(
effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$prob, effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$lower.prob,
effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$upper.prob) %>%
data.frame() %>%
dplyr::mutate_all(~round(.,2)) %>%
dplyr::mutate(prob_c1= paste0(prob.X1, "(95%CI=",L.prob.X1,",",U.prob.X1,")"),prob_c2= paste0(prob.X2, "(95%CI=",L.prob.X2,",",U.prob.X2,")"), prob_c3= paste0(prob.X3, "(95%CI=",L.prob.X3,",",U.prob.X3,")"),prob_c4= paste0(prob.X4, "(95%CI=",L.prob.X4,",",U.prob.X4,")"),prob_c5= paste0(prob.X5, "(95%CI=",L.prob.X5,",",U.prob.X5,")"),prob_c6= paste0(prob.X6, "(95%CI=",L.prob.X6,",",U.prob.X6,")")) %>% dplyr::select(starts_with("prob_c")) %>%
t() %>% data.table::data.table(keep.rownames=T) %>%
dplyr::mutate(rn=gsub("prob_c","",rn)) %>%
knitr::kable("markdown", caption="Probabilidad de pertenecer a una clase, según interrupción del embarazo", col.names = c("Clase","No interrumpe", "Interrumpe"))
| Clase | No interrumpe | Interrumpe |
|---|---|---|
| 1 | 0.18(95%CI=0.15,0.21) | 0.18(95%CI=0.16,0.19) |
| 2 | 0.23(95%CI=0.2,0.27) | 0.26(95%CI=0.25,0.28) |
| 3 | 0.21(95%CI=0.18,0.25) | 0.26(95%CI=0.25,0.28) |
| 4 | 0.16(95%CI=0.13,0.19) | 0.15(95%CI=0.14,0.17) |
| 5 | 0.12(95%CI=0.1,0.15) | 0.09(95%CI=0.08,0.1) |
| 6 | 0.09(95%CI=0.07,0.12) | 0.05(95%CI=0.05,0.06) |
#########################################################
#########################################################
### Posterior probability calculation ###
### Assign class based on maximum probability ###
### Note: additional prep for Table1 package ###
### 1) Convert all categorical variables to ###
### factors ###
### 2) Continuous variables as numeric ###
### 3) Pull out number from census strings ###
#########################################################
#########################################################
save.image("data2_lca3_sin_po.RData")
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '50%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",#;
"}")))